This data set contains 113,937 loans with 81 variables on each loan, including loan amount, borrower rate (or interest rate), current loan status, borrower income, and many others.
- This data dictionary explains the variables in the data set.
- You are not expected to explore all of the variables in the dataset! Focus your exploration on about 10-15 of them.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv("prosperLoanData.csv")
df.head()
| ListingKey | ListingNumber | ListingCreationDate | CreditGrade | Term | LoanStatus | ClosedDate | BorrowerAPR | BorrowerRate | LenderYield | ... | LP_ServiceFees | LP_CollectionFees | LP_GrossPrincipalLoss | LP_NetPrincipalLoss | LP_NonPrincipalRecoverypayments | PercentFunded | Recommendations | InvestmentFromFriendsCount | InvestmentFromFriendsAmount | Investors | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1021339766868145413AB3B | 193129 | 2007-08-26 19:09:29.263000000 | C | 36 | Completed | 2009-08-14 00:00:00 | 0.16516 | 0.1580 | 0.1380 | ... | -133.18 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | 0.0 | 258 |
| 1 | 10273602499503308B223C1 | 1209647 | 2014-02-27 08:28:07.900000000 | NaN | 36 | Current | NaN | 0.12016 | 0.0920 | 0.0820 | ... | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | 0.0 | 1 |
| 2 | 0EE9337825851032864889A | 81716 | 2007-01-05 15:00:47.090000000 | HR | 36 | Completed | 2009-12-17 00:00:00 | 0.28269 | 0.2750 | 0.2400 | ... | -24.20 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | 0.0 | 41 |
| 3 | 0EF5356002482715299901A | 658116 | 2012-10-22 11:02:35.010000000 | NaN | 36 | Current | NaN | 0.12528 | 0.0974 | 0.0874 | ... | -108.01 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | 0.0 | 158 |
| 4 | 0F023589499656230C5E3E2 | 909464 | 2013-09-14 18:38:39.097000000 | NaN | 36 | Current | NaN | 0.24614 | 0.2085 | 0.1985 | ... | -60.27 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 | 0.0 | 20 |
5 rows × 81 columns
81 columns and 113937 rows
the loan amounts and loan amount > 10000 the most convinient way to get it should have which factors.
Credit Score/grade , Prosper Score , borrower Rate , Occupation , etc
# A Count plot of Loan Status in genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(12 , reverse=True)
sns.countplot(x=df['LoanStatus'], ax=ax , order=df['LoanStatus'].value_counts().index , palette=cmap)
plt.xticks(rotation=15)
plt.title("Diffrent Loan Status Counts (in gen)")
for i, v in df['LoanStatus'].value_counts().reset_index().iterrows():
ax.text(i, v.LoanStatus + 0.2 , v.LoanStatus, color='red')
plt.xlabel("Types of loan status")
plt.ylabel("Count of no of loans");
# A Count plot of Loan Status in elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(12 , reverse=True)
sns.countplot(x=df.query("LoanOriginalAmount > 10000")['LoanStatus'], ax=ax , order=df.query("LoanOriginalAmount > 10000")['LoanStatus'].value_counts().index , palette=cmap)
plt.xticks(rotation=15)
plt.title("Diffrent Loan Status Counts (in elite)")
for i, v in df.query("LoanOriginalAmount > 10000")['LoanStatus'].value_counts().reset_index().iterrows():
ax.text(i, v.LoanStatus + 0.2 , v.LoanStatus, color='red')
plt.xlabel("types of loan status")
plt.ylabel("Count of no of loans");
# A Count plot of Listing Category for genral loans
new = ["Not Available" , "Debt Consolidation", "Home Improvement" , "Business" , "Personal Loan" , "Student Use" , "Auto" , "Other" , "Baby&Adoption",
"Boat" , "Cosmetic Procedure" , "Engagement Ring" , "Green Loans" , "Household Expenses" , "Large Purchases" , "Medical/Dental" , "Motorcycle" ,
"RV" , "Taxes" , "Vacation" , "Wedding Loans"]
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(20 , reverse=True)
sns.countplot(x=df['ListingCategory'], ax=ax , order=df['ListingCategory'].value_counts().index , palette=cmap)
labels = [item.get_text() for item in ax.get_xticklabels()]
for i in range(21):
labels[i] = new[int(labels[i])]
ax.set_xticklabels(labels)
plt.xticks(rotation=25)
plt.title("Loans given out according to Category (in gen)")
plt.xlabel("Categories of Loan types")
plt.ylabel("Count of no of loans");
for i, v in df['ListingCategory'].value_counts().reset_index().iterrows():
ax.text(i, v.ListingCategory + 0.2 , v.ListingCategory, color='red');
# A Count plot of Listing Category for Elite Loans
new = ["Not Available" , "Debt Consolidation", "Home Improvement" , "Business" , "Personal Loan" , "Student Use" , "Auto" , "Other" , "Baby&Adoption",
"Boat" , "Cosmetic Procedure" , "Engagement Ring" , "Green Loans" , "Household Expenses" , "Large Purchases" , "Medical/Dental" , "Motorcycle" ,
"RV" , "Taxes" , "Vacation" , "Wedding Loans"]
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(20 , reverse=True)
sns.countplot(x=df.query("LoanOriginalAmount > 10000")['ListingCategory'], ax=ax , order=df.query("LoanOriginalAmount > 10000")['ListingCategory'].value_counts().index , palette=cmap)
labels = [item.get_text() for item in ax.get_xticklabels()]
for i in range(21):
labels[i] = new[int(labels[i])]
ax.set_xticklabels(labels)
plt.xticks(rotation=25)
plt.title("Loans given out according to Category (in elite)")
plt.xlabel("Categories of Loan types")
plt.ylabel("Count of no of loans");
for i, v in df.query("LoanOriginalAmount > 10000")['ListingCategory'].value_counts().reset_index().iterrows():
ax.text(i, v.ListingCategory + 0.2 , v.ListingCategory, color='red');
# A Count plot of Diffent occupations in genral
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(20 , reverse=True)
sns.countplot(x=df["Occupation"], ax=ax , order=df["Occupation"].value_counts().iloc[2:20].index , palette=cmap)
plt.xticks(rotation=45)
plt.title("Count of loans given out by Ocuupation (in gen)")
plt.xlabel("Diffrent types of Occupations")
plt.ylabel("Count of no of loans");
for i, v in df["Occupation"].value_counts().iloc[2:20].reset_index().iterrows():
ax.text(i, v.Occupation + 0.2 , v.Occupation, color='red');
# A Count plot of Diffent occupations in elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(20 , reverse=True)
sns.countplot(x=df.query("LoanOriginalAmount > 10000")["Occupation"], ax=ax , order=df.query("LoanOriginalAmount > 10000")["Occupation"].value_counts().iloc[2:20].index , palette=cmap)
plt.xticks(rotation=45)
plt.title("Count of loans given out by Ocuupation (in elite)")
plt.xlabel("Diffrent types of occupations")
plt.ylabel("Count of no of loans");
for i, v in df.query("LoanOriginalAmount > 10000")["Occupation"].value_counts().iloc[2:20].reset_index().iterrows():
ax.text(i, v.Occupation + 0.2 , v.Occupation, color='red');
# A Count plot of Diffent Credit Grades in genral
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(9 , reverse=True)
sns.countplot(x=df["CreditGrade"], ax=ax , order=df["CreditGrade"].value_counts().index , palette=cmap)
plt.title("Count of loans given out by Credit Grade (in gen)")
plt.xlabel("Credit Grades of people applying for loans")
plt.ylabel("Count of no of loans");
for i, v in df["CreditGrade"].value_counts().reset_index().iterrows():
ax.text(i, v.CreditGrade + 0.2 , v.CreditGrade, color='red');
# A Count plot of Diffent Credit Grades in elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(9 , reverse=True)
sns.countplot(x=df.query("LoanOriginalAmount > 10000")["CreditGrade"], ax=ax , order=df.query("LoanOriginalAmount > 10000")["CreditGrade"].value_counts().index , palette=cmap)
plt.title("Count of loans given out by Credit Grade (in elite)")
plt.xlabel("Credit Grades of people applying for loans")
plt.ylabel("Count of no of loans");
for i, v in df.query("LoanOriginalAmount > 10000")["CreditGrade"].value_counts().reset_index().iterrows():
ax.text(i, v.CreditGrade + 0.2 , v.CreditGrade, color='red');
# A Count plot of Diffent Employment Status in genral
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(9 , reverse=True)
sns.countplot(x=df["EmploymentStatus"], ax=ax , order=df["EmploymentStatus"].value_counts().index , palette=cmap)
plt.title("Count of loans given out by Employment Status (in gen)")
plt.xlabel("Employment status of the people applying for loan")
plt.ylabel("Count of no of loans");
for i, v in df["EmploymentStatus"].value_counts().reset_index().iterrows():
ax.text(i, v.EmploymentStatus + 0.2 , v.EmploymentStatus, color='red');
# A Count plot of Diffent Employment Status in elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(9 , reverse=True)
sns.countplot(x=df.query("LoanOriginalAmount > 10000")["EmploymentStatus"], ax=ax , order=df.query("LoanOriginalAmount > 10000")["EmploymentStatus"].value_counts().index , palette=cmap)
plt.title("Count of loans given out by Employment Status (in elite)")
plt.xlabel("Employment status of the people applying for loan")
plt.ylabel("Count of no of loans");
for i, v in df.query("LoanOriginalAmount > 10000")["EmploymentStatus"].value_counts().reset_index().iterrows():
ax.text(i, v.EmploymentStatus + 0.2 , v.EmploymentStatus, color='red');
# ahistogram depecting the relationship between diffrent ranges of loan amount and no of people who recieved it
fig, ax = plt.subplots(figsize=(20 , 10))
bins = np.arange(1000 , df['LoanOriginalAmount'].max()+1000 , 3000)
plt.hist(df['LoanOriginalAmount'] ,bins=bins , ec='blue' , alpha=0.6)
plt.xticks(bins , rotation=30)
plt.xlabel("loan amount")
plt.ylabel("no of people who recieved loan")
plt.title("Count of loans given out by Total Loan Amount");
# a histogram depicting the relationship between the ranges of prosper Scores and no of people having prosper
# score between that prosper score in genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
bins = np.arange(df['ProsperScore'].min() , df['ProsperScore'].max()+1 , 1)
plt.hist(df['ProsperScore'] ,bins=bins , ec='blue' , alpha=0.6)
plt.xticks(bins , rotation=30)
plt.xlabel("Prosper Scores")
plt.ylabel("no of people with that Prosper Score")
plt.title("Count of peoples with given Prosper Score (in gen)");
# a histogram depicting the relationship between the ranges of prosper Scores and no of people having prosper
# score between that prosper score in elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
bins = np.arange(df.query("LoanOriginalAmount > 10000")['ProsperScore'].min() , df.query("LoanOriginalAmount > 10000")['ProsperScore'].max()+1 , 1)
plt.hist(df.query("LoanOriginalAmount > 10000")['ProsperScore'] ,bins=bins , ec='green' ,color = 'green' , alpha=0.6)
plt.xticks(bins , rotation=30)
plt.xlabel("Prosper Scores")
plt.ylabel("no of people with that Prosper Score")
plt.title("Count of peoples with given Prosper Score (in elite)");
# A scatterplot depicting a relationship between BorrowerAPR and BorrowerRate in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.scatterplot(data=df , x="BorrowerAPR" , y="BorrowerRate")
plt.title("Borrower APR vs Borrower Rate (in gen)");
# A scatterplot depicting a relationship between BorrowerAPR and BorrowerRate in case of elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.scatterplot(data=df.query("LoanOriginalAmount > 10000") , x="BorrowerAPR" , y="BorrowerRate")
plt.title("Borrower APR vs Borrower Rate (in elite)");
# A violinplot depicting a relationship between Term and BorrowerRate in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.violinplot(data=df , x="Term" , y="BorrowerRate" , color='blue' );
plt.title("rerm vs BorrowerRate (in gen)");
plt.xlabel("Terms available in months");
# A violinplot depicting a relationship between Term and BorrowerRate in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.violinplot(data=df.query("LoanOriginalAmount > 10000") , x="Term" , y="BorrowerRate" , color='green' );
plt.title("rerm vs BorrowerRate (in eite)");
plt.xlabel("Terms available in months");
# A boxplot depicting a relationship between EmploymentStatus and LoanOriginalAmount in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = [[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.1750865648952205, 0.11840023306916837, 0.24215989137836502],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],]
sns.boxplot(data=df , x="EmploymentStatus" , y="LoanOriginalAmount" , palette=cmap)
plt.xticks(rotation=25)
plt.xlabel("Current Employment status of the borrower")
plt.ylabel("Original loan amount in dollars");
plt.title("Employment Status vs Loan Original Amount (in gen)");
# A boxplot depicting a relationship between EmploymentStatus and LoanOriginalAmount in case of elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = [[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.1750865648952205, 0.11840023306916837, 0.24215989137836502],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],
[0.9312692223325372, 0.8201921796082118, 0.7971480974663592],]
sns.boxplot(data=df.query("LoanOriginalAmount > 10000") , x="EmploymentStatus" , y="LoanOriginalAmount" , palette=cmap)
plt.xticks(rotation=25)
plt.title("Employment Status vs Loan Original Amount (in elite)");
plt.xlabel("Employment status of the people")
plt.ylabel("Original loan amount in dollars");
# A boxplot depicting a relationship between Prosper Score and LoanOriginalAmount in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(11 )
sns.boxplot(data=df , x="ProsperScore" , y="LoanOriginalAmount" , palette=cmap)
plt.xticks(rotation=25);
plt.title("prosper score vs loanAmount (in gen)");
plt.xlabel("Prosper Score of the people")
plt.ylabel("Original loan amount in dollars");
# A boxplot depicting a relationship between Prosper Score and LoanOriginalAmount in case of elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(11 )
sns.boxplot(data=df.query("LoanOriginalAmount > 10000") , x="ProsperScore" , y="LoanOriginalAmount" , palette=cmap)
plt.xticks(rotation=25);
plt.title("prosper score vs loanAmount (in elite)");
plt.xlabel("Prosper Score of the people")
plt.ylabel("Original loan amount in dollars");
# a stripplot that depicts the relationship between CreditScoreAvg and LoanOriginalAmount with new_emp
# as the third variable in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(data=df , x="CreditScoreAvg" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit score vs loanAmount (in gen)");
plt.xlabel("Credit Scores of people")
plt.ylabel("Original loan amount in dollars");
# a stripplot that depicts the relationship between CreditScoreAvg and LoanOriginalAmount with new_emp
# as the third variable in case of elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(data=df.query("LoanOriginalAmount > 10000") , x="CreditScoreAvg" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit score vs loanAmount wrt employ status (in elite)");
plt.xlabel("Credit Score of people")
plt.ylabel("Original loan amount in dollars");
# a boxplot that depicts the relationship between Credit Grade and LoanOriginalAmount with new_emp
# as the third variable in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.boxplot(data=df.loc[df['new_emp'].isin(['Employed' , 'Not employed'])] , x="CreditGrade" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit grade vs loanAmount wrt employed or not(in gen)");
plt.xlabel("Credit Grades")
plt.ylabel("Original loan amount in dollars");
# a boxplot that depicts the relationship between Credit Grade and LoanOriginalAmount with new_emp
# as the third variable in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.boxplot(data=df.query("LoanOriginalAmount > 10000").loc[df['new_emp'].isin(['Employed' , 'Not employed'])] , x="CreditGrade" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit grade vs loanAmount wrt employed or not(in elite)");
plt.xlabel("Credit Grades")
plt.ylabel("Original loan amount in dollars");
# a stripplot that depicts the relationship between Credit Grade and LoanOriginalAmount with new_emp
# as the third variable in case of genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(data=df.loc[df['new_emp'].isin(['Employed' , 'Not employed'])] , x="CreditGrade" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit grade vs loanAmount wrt employed or not(in gen)");
plt.xlabel("Credit Grades")
plt.ylabel("Original loan amount in dollars");
# a stripplot that depicts the relationship between Credit Grade and LoanOriginalAmount with new_emp
# as the third variable in case of elite loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(data=df.query("LoanOriginalAmount > 10000").loc[df['new_emp'].isin(['Employed' , 'Not employed'])] , x="CreditGrade" , y="LoanOriginalAmount" , hue = "new_emp" );
plt.title("credit grade vs loanAmount wrt employed or not(in elite)");
plt.xlabel("Credit Grades")
plt.ylabel("Original loan amount in dollars");
# a heatmap of all the columns with |correlation| > 0.5 to fing relations
fig, ax = plt.subplots(figsize=(20 , 20))
df_corr = df.corr()
df_corr[np.abs(df_corr)<.50] = 0
mask = np.zeros_like(df_corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(df_corr , mask=mask , cmap='coolwarm' , linewidths=1)
plt.xlabel("columns")
plt.ylabel("columns")
plt.title("|correlation| > 0.5");
# from the above matrix heatmap we can derive a smaller heatmap
plt.figure(figsize=(15,15))
sns.heatmap(df[['BorrowerAPR', 'BorrowerRate', 'LenderYield', 'EstimatedEffectiveYield','EstimatedLoss' , 'EstimatedReturn' , 'ProsperRating (numeric)' , 'ProsperScore', 'CreditGrade', 'CreditScoreAvg', 'Term','ListingCategory']].corr() , annot=True);
plt.xlabel("columns")
plt.ylabel("columns")
plt.title("correlations");
# exploring the relationship between CreditScoreAvg ProsperScore using heatmap
sns.heatmap(df[['CreditScoreAvg' , 'ProsperScore']].corr() , annot=True)
plt.title("credit score and Prosper Score");
plt.xlabel("columns")
plt.ylabel("columns");
# exploring the relationship between term and Creditscoreavg using boxplot
fig, ax = plt.subplots(figsize=(20 , 10))
sns.boxplot(x ='Term', y ='CreditScoreAvg', data = df,
);
plt.title("credit score and Term");
plt.xlabel("Terms for loan in months")
plt.ylabel("Agerage Cedit scores");
# exploring the relationship between CreditScoreAvg and BorrowerAPR using stripplot
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(data=df , x="CreditScoreAvg" , y="BorrowerAPR" , jitter = True, dodge = True);
plt.title("credit score and BorrowerAPR");
plt.xlabel("Agerage Cedit scores");
# exploring the relationship between ProsperRating (numeric) and BorrowerAPR using stripplot
fig, ax = plt.subplots(figsize=(20 , 10))
sns.stripplot(x ='ProsperRating (numeric)', y ='BorrowerAPR', data = df,
jitter = True, dodge = True);
plt.title("Prosper Rating and BorrowerAPR");
# Plotting Correlationds using scattermatrix
df_scatter = df[['BorrowerAPR', 'BorrowerRate', 'LenderYield', 'EstimatedEffectiveYield','EstimatedLoss' , 'EstimatedReturn' , 'ProsperRating (numeric)' , 'ProsperScore', 'CreditGrade', 'CreditScoreAvg', 'Term','ListingCategory']]
sns.pairplot(df_scatter)
plt.xlabel("columns")
plt.ylabel("columns")
plt.title("correlations");
From the above visualizations we get to know
# exploring the relationship between BorrowerRate and LoanStatus using boxplot in genral loans
fig, ax = plt.subplots(figsize=(20 , 10))
sns.boxplot(x ='BorrowerRate', y ='LoanStatus', data = df)
plt.title("Loan STates and Borrower Rate (in gen)");
plt.ylabel("Status crrently of the loan");
# exploring the relationship between BorrowerRate and LoanStatus using boxplot in elite loan
fig, ax = plt.subplots(figsize=(20 , 10))
sns.boxplot(x ='BorrowerRate', y ='LoanStatus', data = df.query("LoanOriginalAmount > 10000"))
plt.title("Loan STates and Borrower Rate (in elite)");
plt.ylabel("Status crrently of the loan");
# exploring the relationship between ProsperRating and LoanStatus using heatmap in gen loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.pivot_table(index='LoanStatus',
columns='ProsperRating (numeric)',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap , annot=True)
plt.title("Loan STates and ProsperRating (in gen)")
plt.ylabel("Status crrently of the loan");
# exploring the relationship between ProsperRating and ListingKey using heatmap in elite loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.query("LoanOriginalAmount > 10000").pivot_table(index='LoanStatus',
columns='ProsperRating (numeric)',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap , annot=True)
plt.title("Loan STates and ProsperRating (in elite)");
plt.ylabel("Status crrently of the loan");
# exploring the relationship between LoanStatus and Term using heatmap in genral loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.pivot_table(index='LoanStatus',
columns='Term',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap , annot=True)
plt.title("Loan STates and Term (in gen)");
plt.xlabel("Terms for loan in months")
plt.ylabel("Status crrently of the loan");
# exploring the relationship between LoanStatus and Term using heatmap in elite loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.query("LoanOriginalAmount > 10000").pivot_table(index='LoanStatus',
columns='Term',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap , annot=True)
plt.title("Loan STates and Term (in elite)");
plt.xlabel("Terms for loan in months")
plt.ylabel("Status crrently of the loan");
# exploring the relationship between LoanStatus and CreditScoreAvg using heatmap in genral loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.pivot_table(index='LoanStatus',
columns='CreditScoreAvg',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap )
plt.title("Loan STates and CreditScoreAvg (in gen)");
plt.xlabel("Average Credit Scores")
plt.ylabel("Status crrently of the loan");
# exploring the relationship between LoanStatus and CreditScoreAvg using heatmap in elite loan
fig, ax = plt.subplots(figsize=(20 , 10))
df_heatmap = df.query("LoanOriginalAmount > 10000").pivot_table(index='LoanStatus',
columns='CreditScoreAvg',
values='ListingKey',
aggfunc=lambda x: len(x.unique()))
sns.heatmap(df_heatmap )
plt.title("Loan STates and CreditScoreAvg (in gen)");
plt.xlabel("Average Credit Scores")
plt.ylabel("Status crrently of the loan");
From the above visualizations we get to know-
# exploring the relationship between ListingCategory and LoanOriginalAmount using barplot in genral loan
new = ["Not Available" , "Debt Consolidation", "Home Improvement" , "Business" , "Personal Loan" , "Student Use" , "Auto" , "Other" , "Baby&Adoption",
"Boat" , "Cosmetic Procedure" , "Engagement Ring" , "Green Loans" , "Household Expenses" , "Large Purchases" , "Medical/Dental" , "Motorcycle" ,
"RV" , "Taxes" , "Vacation" , "Wedding Loans"]
fig, ax = plt.subplots(figsize=(20 , 10))
cmap = sns.cubehelix_palette(21 , reverse=True)
sns.barplot(data=df , x="ListingCategory" , y="LoanOriginalAmount" ,palette=cmap )
labels = [item.get_text() for item in ax.get_xticklabels()]
for i in range(21):
labels[i] = new[int(labels[i])]
ax.set_xticklabels(labels)
plt.xticks(rotation=25);
plt.title("Loan Categories and Loan amount");
plt.xlabel("Loan Ctegories")
plt.ylabel("Original loan amount in dollars");
- Having credit score >729.5 , grade AA , A , B, D , C and Prosper Score in between (6-9) and having a job of Executive Followed by computer programmer demanding a loan for Debt Consolidation will ensure higher chances to get an Elite Loan(loan>10000).
- Lender Yield is directly proportional to borrowAPR , Borrow Rate and Estimated Effective Yield